library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
trainLabeled <- read.delim("~/GitHub/FCA/Data/trainSet.txt")
validLabeled <- read.delim("~/GitHub/FCA/Data/arcene_valid.txt")
wholeArceneSet <- rbind(trainLabeled,validLabeled)
wholeArceneSet$Labels <- 1*(wholeArceneSet$Labels > 0)
wholeArceneSet[,1:ncol(trainLabeled)] <- sapply(wholeArceneSet,as.double)
studyName <- "ARCENE"
dataframe <- wholeArceneSet
outcome <- "Labels"
thro <- 0.8
cexheat = 0.10
TopVariables <- 10
Some libraries
library(psych)
library(whitening)
library("vioplot")
library("rpart")
pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
| rows | col |
|---|---|
| 200 | 10000 |
pander::pander(table(dataframe[,outcome]))
| 0 | 1 |
|---|---|
| 112 | 88 |
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
largeSet <- length(varlist) > 1500
Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns
### Some global cleaning
sdiszero <- apply(dataframe,2,sd) > 1.0e-16
dataframe <- dataframe[,sdiszero]
varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
dataframe <- dataframe[,tokeep]
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
iscontinous <- sapply(apply(dataframe,2,unique),length) > 5 ## Only variables with enough samples
dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData
numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000
if (!largeSet)
{
hm <- heatMaps(data=dataframeScaled[1:numsub,],
Outcome=outcome,
Scale=TRUE,
hCluster = "row",
xlab="Feature",
ylab="Sample",
srtCol=45,
srtRow=45,
cexCol=cexheat,
cexRow=cexheat
)
par(op)
}
The heat map of the data
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
#cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
cormat <- cor(dataframe[,varlist],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Original Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
print(max(abs(cormat)))
}
DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#>
#> Included: 7748 , Uni p: 0.01544774 , Uncorrelated Base: 920 , Outcome-Driven Size: 0 , Base Size: 920
#>
#>
1 <R=1.000,r=0.975,N= 6654>, Top: 373( 47 )...=[ 2 : 373 Fa= 371 : 0.991 ]( 371 , 2281 , 0 ),<|>Tot Used: 2652 , Added: 2281 , Zero Std: 0 , Max Cor: 1.000
#>
2 <R=1.000,r=0.975,N= 6654>, Top: 875( 22 )........=[ 2 : 875 Fa= 1233 : 0.992 ]( 862 , 2079 , 371 ),<|>Tot Used: 4588 , Added: 2079 , Zero Std: 0 , Max Cor: 1.000
#>
3 <R=1.000,r=0.975,N= 6654>, Top: 738( 9 ).......=[ 2 : 738 Fa= 1957 : 0.992 ]( 729 , 1470 , 1233 ),<|>Tot Used: 5682 , Added: 1470 , Zero Std: 0 , Max Cor: 1.000
#>
4 <R=1.000,r=0.975,N= 6654>, Top: 422( 17 )....=[ 2 : 422 Fa= 2371 : 0.990 ]( 416 , 767 , 1957 ),<|>Tot Used: 6199 , Added: 767 , Zero Std: 0 , Max Cor: 1.000
#>
5 <R=1.000,r=0.975,N= 6654>, Top: 187( 9 ).=[ 2 : 187 Fa= 2557 : 0.989 ]( 186 , 291 , 2371 ),<|>Tot Used: 6387 , Added: 291 , Zero Std: 0 , Max Cor: 0.999
#>
6 <R=0.999,r=0.950,N= 3536>, Top: 1248( 7 )............=[ 2 : 1248 Fa= 2945 : 0.970 ]( 1200 , 1565 , 2557 ),<|>Tot Used: 6600 , Added: 1565 , Zero Std: 0 , Max Cor: 0.999
#>
7 <R=0.999,r=0.950,N= 3536>, Top: 498( 7 )....[ 1 : 498 Fa= 3060 : 0.950 ]( 487 , 548 , 2945 ),<|>Tot Used: 6666 , Added: 548 , Zero Std: 0 , Max Cor: 0.999
#>
8 <R=0.999,r=0.950,N= 3536>, Top: 120( 6 ).[ 1 : 120 Fa= 3087 : 0.950 ]( 119 , 141 , 3060 ),<|>Tot Used: 6687 , Added: 141 , Zero Std: 0 , Max Cor: 0.999
#>
9 <R=0.999,r=0.900,N= 2892>, Top: 1117( 1 )..........[ 1 : 1117 Fa= 3332 : 0.923 ]( 1075 , 1330 , 3087 ),<|>Tot Used: 6768 , Added: 1330 , Zero Std: 0 , Max Cor: 0.999
#>
10 <R=0.999,r=0.899,N= 2892>, Top: 277( 1 )..=[ 2 : 277 Fa= 3398 : 0.940 ]( 273 , 358 , 3332 ),<|>Tot Used: 6775 , Added: 358 , Zero Std: 0 , Max Cor: 0.998
#>
11 <R=0.998,r=0.899,N= 2892>, Top: 74( 2 )[ 1 : 74 Fa= 3417 : 0.899 ]( 72 , 124 , 3398 ),<|>Tot Used: 6780 , Added: 124 , Zero Std: 0 , Max Cor: 0.996
#>
12 <R=0.996,r=0.848,N= 2071>, Top: 754( 1 ).......[ 1 : 754 Fa= 3526 : 0.861 ]( 734 , 943 , 3417 ),<|>Tot Used: 6798 , Added: 943 , Zero Std: 0 , Max Cor: 0.995
#>
13 <R=0.995,r=0.848,N= 2071>, Top: 234( 1 )..[ 1 : 234 Fa= 3561 : 0.848 ]( 222 , 329 , 3526 ),<|>Tot Used: 6799 , Added: 329 , Zero Std: 0 , Max Cor: 0.995
#>
14 <R=0.995,r=0.847,N= 2071>, Top: 59( 1 )[ 1 : 59 Fa= 3564 : 0.847 ]( 58 , 88 , 3561 ),<|>Tot Used: 6803 , Added: 88 , Zero Std: 0 , Max Cor: 0.958
#>
15 <R=0.958,r=0.800,N= 1586>, Top: 568( 1 ).....[ 1 : 568 Fa= 3635 : 0.800 ]( 559 , 721 , 3564 ),<|>Tot Used: 6813 , Added: 721 , Zero Std: 0 , Max Cor: 0.988
#>
16 <R=0.988,r=0.800,N= 1586>, Top: 155( 1 ).[ 1 : 155 Fa= 3656 : 0.800 ]( 153 , 240 , 3635 ),<|>Tot Used: 6815 , Added: 240 , Zero Std: 0 , Max Cor: 0.965
#>
17 <R=0.965,r=0.800,N= 1586>, Top: 45( 1 )[ 1 : 45 Fa= 3661 : 0.800 ]( 45 , 67 , 3656 ),<|>Tot Used: 6815 , Added: 67 , Zero Std: 0 , Max Cor: 0.846
#>
18 <R=0.846,r=0.800,N= 15>, Top: 6( 4 )[ 1 : 6 Fa= 3661 : 0.800 ]( 6 , 9 , 3661 ),<|>Tot Used: 6815 , Added: 9 , Zero Std: 0 , Max Cor: 0.891
#>
19 <R=0.891,r=0.800,N= 15>, Top: 2( 1 )[ 1 : 2 Fa= 3662 : 0.800 ]( 2 , 2 , 3661 ),<|>Tot Used: 6815 , Added: 2 , Zero Std: 0 , Max Cor: 0.800
#>
20 <R=0.800,r=0.800,N= 0>
#>
[ 20 ], 0.7999522 Decor Dimension: 6815 Nused: 6815 . Cor to Base: 1356 , ABase: 50 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
63594442
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
6183186
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
3.08
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
1.7
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPSTM <- attr(DEdataframe,"UPSTM")
gplots::heatmap.2(1.0*(abs(UPSTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
}
if (!largeSet)
{
cormat <- cor(DEdataframe[,varlistc],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Correlation after IDeA",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
par(op)
diag(cormat) <- 0
print(max(abs(cormat)))
}
if (nrow(dataframe) < 1000)
{
classes <- unique(dataframe[1:numsub,outcome])
raincolors <- rainbow(length(classes))
names(raincolors) <- classes
datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}
if (nrow(dataframe) < 1000)
{
datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}
univarRAW <- uniRankVar(varlist,
paste(outcome,"~1"),
outcome,
dataframe,
rankingTest="AUC")
100 : V100 200 : V201 300 : V302 400 : V402 500 : V504
600 : V606 700 : V707 800 : V807 900 : V907 1000 : V1008
1100 : V1108 1200 : V1209 1300 : V1309 1400 : V1409 1500 : V1509
1600 : V1610 1700 : V1710 1800 : V1810 1900 : V1911 2000 : V2012
2100 : V2113 2200 : V2213 2300 : V2313 2400 : V2417 2500 : V2518
2600 : V2620 2700 : V2722 2800 : V2822 2900 : V2922 3000 : V3023
3100 : V3123 3200 : V3223 3300 : V3326 3400 : V3428 3500 : V3528
3600 : V3629 3700 : V3734 3800 : V3835 3900 : V3935 4000 : V4038
4100 : V4140 4200 : V4243 4300 : V4344 4400 : V4445 4500 : V4547
4600 : V4649 4700 : V4751 4800 : V4853 4900 : V4954 5000 : V5055
5100 : V5156 5200 : V5256 5300 : V5360 5400 : V5462 5500 : V5564
5600 : V5666 5700 : V5768 5800 : V5868 5900 : V5970 6000 : V6070
6100 : V6171 6200 : V6271 6300 : V6372 6400 : V6473 6500 : V6573
6600 : V6675 6700 : V6777 6800 : V6881 6900 : V6983 7000 : V7088
7100 : V7190 7200 : V7291 7300 : V7393 7400 : V7496 7500 : V7597
7600 : V7701 7700 : V7803 7800 : V7904 7900 : V8007 8000 : V8108
8100 : V8209 8200 : V8310 8300 : V8414 8400 : V8516 8500 : V8620
8600 : V8721 8700 : V8822 8800 : V8925 8900 : V9026 9000 : V9128
9100 : V9232 9200 : V9332 9300 : V9433 9400 : V9533 9500 : V9638
9600 : V9739 9700 : V9841 9800 : V9944
univarDe <- uniRankVar(varlistc,
paste(outcome,"~1"),
outcome,
DEdataframe,
rankingTest="AUC",
)
100 : V100 200 : La_V201 300 : V302 400 : La_V402 500 : V504
600 : V606 700 : V707 800 : V807 900 : La_V907 1000 : La_V1008
1100 : La_V1108 1200 : V1209 1300 : La_V1309 1400 : La_V1409 1500 :
La_V1509
1600 : La_V1610 1700 : La_V1710 1800 : La_V1810 1900 : La_V1911 2000 :
La_V2012
2100 : La_V2113 2200 : La_V2213 2300 : V2313 2400 : La_V2417 2500 :
La_V2518
2600 : V2620 2700 : La_V2722 2800 : V2822 2900 : V2922 3000 :
La_V3023
3100 : La_V3123 3200 : La_V3223 3300 : V3326 3400 : V3428 3500 :
La_V3528
3600 : La_V3629 3700 : La_V3734 3800 : La_V3835 3900 : La_V3935 4000 :
La_V4038
4100 : La_V4140 4200 : La_V4243 4300 : V4344 4400 : V4445 4500 :
La_V4547
4600 : La_V4649 4700 : La_V4751 4800 : V4853 4900 : La_V4954 5000 :
La_V5055
5100 : V5156 5200 : La_V5256 5300 : V5360 5400 : La_V5462 5500 :
La_V5564
5600 : La_V5666 5700 : La_V5768 5800 : La_V5868 5900 : La_V5970 6000 :
La_V6070
6100 : La_V6171 6200 : La_V6271 6300 : La_V6372 6400 : La_V6473 6500 :
V6573
6600 : La_V6675 6700 : La_V6777 6800 : V6881 6900 : La_V6983 7000 :
La_V7088
7100 : La_V7190 7200 : V7291 7300 : La_V7393 7400 : La_V7496 7500 :
La_V7597
7600 : La_V7701 7700 : La_V7803 7800 : La_V7904 7900 : V8007 8000 :
La_V8108
8100 : La_V8209 8200 : La_V8310 8300 : La_V8414 8400 : La_V8516 8500 :
V8620
8600 : La_V8721 8700 : La_V8822 8800 : La_V8925 8900 : V9026 9000 :
V9128
9100 : La_V9232 9200 : La_V9332 9300 : La_V9433 9400 : V9533 9500 :
La_V9638
9600 : La_V9739 9700 : La_V9841 9800 : V9944
univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")
##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| V5005 | 314.7 | 72.9 | 239 | 83.6 | 0.18466 | 0.772 |
| V4960 | 47.5 | 49.3 | 124 | 97.3 | 0.19534 | 0.751 |
| V2309 | 43.1 | 45.3 | 113 | 89.0 | 0.18665 | 0.751 |
| V8368 | 44.9 | 46.1 | 116 | 90.6 | 0.21091 | 0.751 |
| V312 | 47.2 | 48.0 | 122 | 94.7 | 0.21678 | 0.750 |
| V3365 | 46.3 | 46.9 | 119 | 92.5 | 0.22139 | 0.749 |
| V9617 | 40.9 | 44.7 | 109 | 87.5 | 0.15591 | 0.749 |
| V414 | 47.5 | 50.4 | 125 | 100.4 | 0.16265 | 0.749 |
| V9092 | 33.9 | 63.1 | 124 | 132.6 | 0.00199 | 0.748 |
| V1936 | 316.0 | 79.5 | 243 | 79.2 | 0.28495 | 0.748 |
topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]
theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]
pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| La_V9413 | -5.86 | 7.833 | 5.081 | 9.70 | 2.68e-05 | 0.816 |
| La_V7908 | 24.72 | 17.813 | 3.389 | 19.31 | 1.34e-03 | 0.802 |
| La_V316 | -8.88 | 8.424 | -0.250 | 9.09 | 1.78e-04 | 0.789 |
| La_V6132 | -2.98 | 4.591 | 6.202 | 11.86 | 1.18e-07 | 0.766 |
| La_V1620 | 4.47 | 3.705 | -0.125 | 5.92 | 3.93e-05 | 0.764 |
| La_V2652 | 5.59 | 10.949 | -3.707 | 11.11 | 2.47e-03 | 0.757 |
| La_V8996 | 4.26 | 3.278 | 1.231 | 3.19 | 5.62e-02 | 0.751 |
| La_V400 | -10.15 | 9.172 | -1.096 | 9.23 | 9.45e-04 | 0.746 |
| La_V1163 | -1.10 | 0.836 | 0.142 | 1.86 | 1.52e-10 | 0.743 |
| La_V2536 | -3.26 | 3.193 | -0.180 | 4.24 | 3.08e-05 | 0.735 |
dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")
theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))
theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)
pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
| mean | total | fraction |
|---|---|---|
| 3.14 | 6619 | 0.672 |
allSigvars <- names(dc)
dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
coef <- theFormulas[[dx]]
cname <- names(theFormulas[[dx]])
names(cname) <- cname
for (cf in names(coef))
{
if (cf != dx)
{
if (coef[cf]>0)
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
}
else
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("%5.3f*%s",coef[cf],cname[cf]))
}
}
}
}
finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])
orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]
Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")
finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
| DecorFormula | caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | RAWAUC | fscores | |
|---|---|---|---|---|---|---|---|---|---|
| La_V9413 | -1.062V3945 + 1.000V9413 | -5.86 | 7.833 | 5.081 | 9.70 | 2.68e-05 | 0.816 | 0.547 | 4 |
| La_V7908 | -2.243V2026 -1.042V4192 + 2.264V6826 + 1.000V7908 | 24.72 | 17.813 | 3.389 | 19.31 | 1.34e-03 | 0.802 | 0.729 | -1 |
| La_V316 | + 1.000V316 -1.074V6388 | -8.88 | 8.424 | -0.250 | 9.09 | 1.78e-04 | 0.789 | 0.545 | 2 |
| La_V6132 | -0.933V845 + 1.000V6132 | -2.98 | 4.591 | 6.202 | 11.86 | 1.18e-07 | 0.766 | 0.628 | 1 |
| La_V1620 | + 1.000V1620 -1.054V2371 | 4.47 | 3.705 | -0.125 | 5.92 | 3.93e-05 | 0.764 | 0.537 | 1 |
| La_V2652 | + 1.383V298 + 1.000V2652 -1.561V5748 -0.899V9074 | 5.59 | 10.949 | -3.707 | 11.11 | 2.47e-03 | 0.757 | 0.624 | -1 |
| La_V8996 | -1.037V649 -0.372V2026 + 0.375V6826 + 1.334V8402 -0.281*V8996 | 4.26 | 3.278 | 1.231 | 3.19 | 5.62e-02 | 0.751 | 0.690 | 1 |
| La_V400 | + 1.000V400 -0.952V521 | -10.15 | 9.172 | -1.096 | 9.23 | 9.45e-04 | 0.746 | 0.545 | 2 |
| La_V1163 | + 1.000V1163 -0.490V4126 -0.516*V7512 | -1.10 | 0.836 | 0.142 | 1.86 | 1.52e-10 | 0.743 | 0.549 | -2 |
| La_V2536 | -1.206V1788 + 1.000V2536 + 0.203*V6444 | -3.26 | 3.193 | -0.180 | 4.24 | 3.08e-05 | 0.735 | 0.597 | -2 |
| V7908 | NA | 367.01 | 216.188 | 219.241 | 182.76 | 6.24e-05 | 0.729 | 0.729 | NA |
| V4192 | NA | 347.32 | 194.538 | 222.893 | 183.71 | 4.84e-05 | 0.725 | 0.725 | NA |
| V8996 | NA | 105.10 | 71.796 | 56.955 | 57.93 | 1.86e-03 | 0.690 | 0.690 | NA |
| V8402 | NA | 101.39 | 70.064 | 60.286 | 58.00 | 1.79e-03 | 0.670 | 0.670 | NA |
| V649 | NA | 100.98 | 66.569 | 63.554 | 57.64 | 1.73e-03 | 0.667 | 0.667 | NA |
| V845 | NA | 154.08 | 120.432 | 102.446 | 119.50 | 3.78e-04 | 0.648 | 0.648 | NA |
| V2026 | NA | 202.09 | 88.768 | 253.812 | 124.53 | 3.78e-03 | 0.634 | 0.634 | NA |
| V6132 | NA | 140.75 | 111.236 | 101.768 | 114.76 | 7.54e-04 | 0.628 | 0.628 | NA |
| V2652 | NA | 206.14 | 136.182 | 143.571 | 137.79 | 1.23e-03 | 0.624 | 0.624 | NA |
| V6444 | NA | 136.08 | 113.563 | 91.643 | 103.79 | 5.84e-04 | 0.623 | 0.623 | NA |
| V6826 | NA | 208.90 | 91.170 | 258.750 | 123.92 | 1.18e-02 | 0.620 | 0.620 | NA |
| V9074 | NA | 203.03 | 153.722 | 148.054 | 150.51 | 1.62e-04 | 0.615 | 0.615 | NA |
| V1788 | NA | 91.43 | 67.831 | 65.071 | 66.60 | 2.19e-03 | 0.613 | 0.613 | NA |
| V2536 | NA | 79.44 | 57.450 | 59.732 | 59.90 | 6.15e-04 | 0.597 | 0.597 | NA |
| V3945 | NA | 123.56 | 102.080 | 101.902 | 102.09 | 2.69e-03 | 0.571 | 0.571 | NA |
| V6388 | NA | 110.74 | 105.732 | 82.929 | 87.26 | 2.87e-03 | 0.567 | 0.567 | NA |
| V7512 | NA | 169.35 | 101.193 | 144.098 | 117.59 | 3.23e-04 | 0.560 | 0.560 | NA |
| V5748 | NA | 100.36 | 75.725 | 85.402 | 77.29 | 8.04e-04 | 0.556 | 0.556 | NA |
| V521 | NA | 174.02 | 170.382 | 160.955 | 165.07 | 1.51e-04 | 0.554 | 0.554 | NA |
| V298 | NA | 100.22 | 87.035 | 86.116 | 84.21 | 3.54e-03 | 0.549 | 0.549 | NA |
| V1163 | NA | 172.58 | 99.246 | 150.027 | 121.06 | 2.52e-04 | 0.549 | 0.549 | NA |
| V9413 | NA | 125.39 | 109.470 | 113.321 | 109.31 | 3.60e-03 | 0.547 | 0.547 | NA |
| V316 | NA | 110.00 | 112.162 | 88.777 | 96.85 | 1.45e-03 | 0.545 | 0.545 | NA |
| V400 | NA | 155.44 | 163.730 | 152.062 | 156.57 | 1.12e-05 | 0.545 | 0.545 | NA |
| V4126 | NA | 176.26 | 99.204 | 154.268 | 123.15 | 1.99e-04 | 0.538 | 0.538 | NA |
| V1620 | NA | 34.19 | 26.851 | 31.830 | 33.88 | 1.01e-04 | 0.537 | 0.537 | NA |
| V2371 | NA | 28.20 | 23.897 | 30.321 | 32.33 | 1.05e-04 | 0.490 | 0.490 | NA |
featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,tol=0.002) #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous])
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)
#pander::pander(pc$rotation)
PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])
gplots::heatmap.2(abs(PCACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "PCA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
EFAdataframe <- dataframeScaled
if (length(iscontinous) < 2000)
{
topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
if (topred < 2) topred <- 2
uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE) # EFA analysis
predEFA <- predict(uls,dataframeScaled[,iscontinous])
EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous])
EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
gplots::heatmap.2(abs(EFACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "EFA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
}
par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
}
pander::pander(table(dataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 109 | 3 |
| 1 | 13 | 75 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.3900 | 0.32199 | 0.4613 |
| tp | 0.4400 | 0.37006 | 0.5117 |
| se | 0.8523 | 0.76064 | 0.9189 |
| sp | 0.9732 | 0.92371 | 0.9944 |
| diag.ac | 0.9200 | 0.87334 | 0.9536 |
| diag.or | 209.6154 | 57.73802 | 760.9996 |
| nndx | 1.2114 | 1.09485 | 1.4612 |
| youden | 0.8255 | 0.68435 | 0.9134 |
| pv.pos | 0.9615 | 0.89169 | 0.9920 |
| pv.neg | 0.8934 | 0.82468 | 0.9420 |
| lr.pos | 31.8182 | 10.38462 | 97.4900 |
| lr.neg | 0.1518 | 0.09181 | 0.2510 |
| p.rout | 0.6100 | 0.53868 | 0.6780 |
| p.rin | 0.3900 | 0.32199 | 0.4613 |
| p.tpdn | 0.0268 | 0.00556 | 0.0763 |
| p.tndp | 0.1477 | 0.08107 | 0.2394 |
| p.dntp | 0.0385 | 0.00800 | 0.1083 |
| p.dptn | 0.1066 | 0.05797 | 0.1753 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 75 | 3 | 78 |
| Test - | 13 | 109 | 122 |
| Total | 88 | 112 | 200 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.920 | 0.873 | 0.954 |
| 3 | se | 0.852 | 0.761 | 0.919 |
| 4 | sp | 0.973 | 0.924 | 0.994 |
| 6 | diag.or | 209.615 | 57.738 | 761.000 |
par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
}
pander::pander(table(DEdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 106 | 6 |
| 1 | 10 | 78 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.4200 | 0.3507 | 0.492 |
| tp | 0.4400 | 0.3701 | 0.512 |
| se | 0.8864 | 0.8009 | 0.944 |
| sp | 0.9464 | 0.8870 | 0.980 |
| diag.ac | 0.9200 | 0.8733 | 0.954 |
| diag.or | 137.8000 | 48.0526 | 395.168 |
| nndx | 1.2008 | 1.0820 | 1.454 |
| youden | 0.8328 | 0.6880 | 0.924 |
| pv.pos | 0.9286 | 0.8510 | 0.973 |
| pv.neg | 0.9138 | 0.8472 | 0.958 |
| lr.pos | 16.5455 | 7.5693 | 36.166 |
| lr.neg | 0.1201 | 0.0669 | 0.216 |
| p.rout | 0.5800 | 0.5083 | 0.649 |
| p.rin | 0.4200 | 0.3507 | 0.492 |
| p.tpdn | 0.0536 | 0.0199 | 0.113 |
| p.tndp | 0.1136 | 0.0559 | 0.199 |
| p.dntp | 0.0714 | 0.0267 | 0.149 |
| p.dptn | 0.0862 | 0.0421 | 0.153 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 78 | 6 | 84 |
| Test - | 10 | 106 | 116 |
| Total | 88 | 112 | 200 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.920 | 0.873 | 0.954 |
| 3 | se | 0.886 | 0.801 | 0.944 |
| 4 | sp | 0.946 | 0.887 | 0.980 |
| 6 | diag.or | 137.800 | 48.053 | 395.168 |
par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}
pander::pander(table(PCAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 96 | 16 |
| 1 | 16 | 72 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.440 | 0.3701 | 0.512 |
| tp | 0.440 | 0.3701 | 0.512 |
| se | 0.818 | 0.7216 | 0.892 |
| sp | 0.857 | 0.7784 | 0.916 |
| diag.ac | 0.840 | 0.7817 | 0.888 |
| diag.or | 27.000 | 12.6607 | 57.580 |
| nndx | 1.481 | 1.2370 | 2.000 |
| youden | 0.675 | 0.5000 | 0.808 |
| pv.pos | 0.818 | 0.7216 | 0.892 |
| pv.neg | 0.857 | 0.7784 | 0.916 |
| lr.pos | 5.727 | 3.6003 | 9.111 |
| lr.neg | 0.212 | 0.1353 | 0.333 |
| p.rout | 0.560 | 0.4883 | 0.630 |
| p.rin | 0.440 | 0.3701 | 0.512 |
| p.tpdn | 0.143 | 0.0839 | 0.222 |
| p.tndp | 0.182 | 0.1076 | 0.278 |
| p.dntp | 0.182 | 0.1076 | 0.278 |
| p.dptn | 0.143 | 0.0839 | 0.222 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 72 | 16 | 88 |
| Test - | 16 | 96 | 112 |
| Total | 88 | 112 | 200 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.840 | 0.782 | 0.888 |
| 3 | se | 0.818 | 0.722 | 0.892 |
| 4 | sp | 0.857 | 0.778 | 0.916 |
| 6 | diag.or | 27.000 | 12.661 | 57.580 |
par(op)
EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(EFAmodel,EFAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
}
pander::pander(table(EFAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 109 | 3 |
| 1 | 13 | 75 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.3900 | 0.32199 | 0.4613 |
| tp | 0.4400 | 0.37006 | 0.5117 |
| se | 0.8523 | 0.76064 | 0.9189 |
| sp | 0.9732 | 0.92371 | 0.9944 |
| diag.ac | 0.9200 | 0.87334 | 0.9536 |
| diag.or | 209.6154 | 57.73802 | 760.9996 |
| nndx | 1.2114 | 1.09485 | 1.4612 |
| youden | 0.8255 | 0.68435 | 0.9134 |
| pv.pos | 0.9615 | 0.89169 | 0.9920 |
| pv.neg | 0.8934 | 0.82468 | 0.9420 |
| lr.pos | 31.8182 | 10.38462 | 97.4900 |
| lr.neg | 0.1518 | 0.09181 | 0.2510 |
| p.rout | 0.6100 | 0.53868 | 0.6780 |
| p.rin | 0.3900 | 0.32199 | 0.4613 |
| p.tpdn | 0.0268 | 0.00556 | 0.0763 |
| p.tndp | 0.1477 | 0.08107 | 0.2394 |
| p.dntp | 0.0385 | 0.00800 | 0.1083 |
| p.dptn | 0.1066 | 0.05797 | 0.1753 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 75 | 3 | 78 |
| Test - | 13 | 109 | 122 |
| Total | 88 | 112 | 200 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.920 | 0.873 | 0.954 |
| 3 | se | 0.852 | 0.761 | 0.919 |
| 4 | sp | 0.973 | 0.924 | 0.994 |
| 6 | diag.or | 209.615 | 57.738 | 761.000 |
par(op)